data_1 <- data %>%group_by(state) %>%summarize(total_cases =sum(cases, na.rm =TRUE),total_deaths =sum(deaths, na.rm =TRUE) ) %>%mutate(sep =case_when( total_cases >= total_deaths ~"High Cases",TRUE~"High Deaths" ))# Top 5 states by casestop_cases <- data_1 %>%arrange(desc(total_cases)) %>%slice(1:5)# Top 5 states by deathstop_deaths <- data_1 %>%arrange(desc(total_deaths)) %>%slice(1:5)# Bar plot for top 5 states by casesggplot(top_cases, aes(x =reorder(state, -total_cases), y = total_cases, fill = sep)) +geom_bar(stat ="identity") +scale_y_continuous(labels =label_comma()) +# Format y-axis with commaslabs(title ="Top 5 States by Cases",x ="State",y ="Total Cases",fill ="Category" ) +theme_minimal()
# Bar plot for top 5 states by deathsggplot(top_deaths, aes(x =reorder(state, -total_deaths), y = total_deaths, fill = sep)) +geom_bar(stat ="identity") +scale_y_continuous(labels =label_comma()) +# Format y-axis with commaslabs(title ="Top 5 States by Deaths",x ="State",y ="Total Deaths",fill ="Category" ) +theme_minimal()
4. Facet Plots
# Parse date, extract day, and summarize datadata_2 <- data %>%mutate(date =mdy(date), # Parse dateday =day(date), # Extract day of the monthmonth_label =month(date, label =TRUE, abbr =FALSE) # Extract full month names ) %>%group_by(state, month_label, day) %>%# Group by state, month, and daysummarize(total_cases =sum(cases, na.rm =TRUE),total_deaths =sum(deaths, na.rm =TRUE),.groups ="drop" )selected_states <-c("California", "Texas", "Florida")data_filtered <- data_2 %>%filter(state %in% selected_states)# Plot for daily deathsfacet_plot_deaths <-ggplot(data_filtered, aes(x = day, y = total_deaths, color = state)) +geom_point(size =2, alpha =0.8) +facet_wrap(~month_label, scales ="free_y") +scale_y_continuous(labels =label_comma()) +labs(title ="Daily Deaths by State",x ="Day of Month",y ="Total Deaths",color ="State" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1),strip.text =element_text(size =10) )# Plot for daily casesfacet_plot_cases <-ggplot(data_filtered, aes(x = day, y = total_cases, color = state)) +geom_point(size =2, alpha =0.8) +facet_wrap(~month_label, scales ="free_y") +scale_y_continuous(labels =label_comma()) +labs(title ="Daily Cases by State",x ="Day of Month",y ="Total Cases",color ="State" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1), strip.text =element_text(size =10) )# Print the plotsfacet_plot_deaths
facet_plot_cases
5. Interactive Line plot
# Ensure date is parsed correctly and remove rows with missing valuesdata <- data %>%mutate(date =mdy(date)) %>%# Convert to Date formatfilter(!is.na(date) &!is.na(deaths)) # Remove rows with missing values# Summarize total deaths by county and datecounty_time_series <- data %>%group_by(county, date) %>%summarise(total_deaths =sum(deaths, na.rm =TRUE), .groups ="drop")# Identify the top 3 counties by total deathstop_counties <- data %>%group_by(county) %>%summarise(total_deaths =sum(deaths, na.rm =TRUE)) %>%arrange(desc(total_deaths)) %>%slice(1:5) %>%pull(county)# Filter data for the top 3 countiescounty_time_series_filtered <- county_time_series %>%filter(county %in% top_counties)# Plot using ggplot and format y-axis with commascounty_time_series_plot <-ggplot(county_time_series_filtered, aes(x = date, y = total_deaths, color = county)) +geom_line(size =1) +scale_y_continuous(labels =label_comma()) +labs(title ="COVID-19 Deaths Over Time by Top 5 Counties",x ="Date",y ="Total Deaths",color ="County" ) +theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# Convert ggplot to Plotly for interactivityggplotly(county_time_series_plot)
6. Interactive Geospatial Map
# Summarize data by statestate_data <- data %>%group_by(state) %>%summarize(total_cases =sum(cases, na.rm =TRUE))# Get map data for US statesus_states <- maps::map("state", fill =TRUE, plot =FALSE)# Join map data with state casesstate_data <- state_data %>%mutate(region =tolower(state)) # Ensure lowercase for matching# Create a named vector of total cases for mappingstate_cases <-setNames(state_data$total_cases, state_data$region)# Create leaflet map with state polygonsleaflet(data = us_states) %>%addTiles() %>%addPolygons(fillColor =~colorNumeric("YlOrRd", domain = state_cases)(state_cases[us_states$names]),weight =1,color ="white",fillOpacity =0.7,popup =~paste0("State: ", us_states$names, "<br>","Total Cases: ", state_cases[us_states$names] ) ) %>%addLegend(pal =colorNumeric("YlOrRd", domain = state_cases),values = state_cases,title ="Total Cases",position ="bottomright" )
7.Choropleth Map(Albers projection)
# Summarize data to get total deaths by statestate_deaths_2023 <- data %>%group_by(state) %>%summarise(total_deaths =sum(deaths, na.rm =TRUE))# Ensure state names are lowercase for mappingstate_deaths_2023 <- state_deaths_2023 %>%mutate(state =str_to_lower(state))# Load state shape datastates <-map_data("state")# Rename region column in map data for consistencystates <- states %>%rename(state = region)# Join state shape data with COVID-19 death datastates_joined <-left_join(states, state_deaths_2023, by ="state")# Plot the Choropleth mapggplot(data = states_joined, aes(x = long, y = lat, group = group)) +geom_polygon(aes(fill = total_deaths /1000), color ="black") +coord_map(projection ="albers", lat0 =39, lat1 =45) +# Use Albers projectionscale_fill_viridis(option ="magma", direction =-1) +# Change color scheme for contrasttheme_minimal() +labs(title ="Total COVID-19 Deaths by State (in thousands), 2023",fill ="Deaths (000s)" )
8.Choropleth Map
# Filter data for Californiacalifornia_cases <- data %>%filter(state =="California") %>%group_by(county) %>%summarise(total_cases =sum(cases, na.rm =TRUE)) %>%mutate(county =str_to_lower(county)) # Ensure county names are lowercase# Load California map dataca <-map_data("county", "california") %>%rename(county = subregion) # Rename subregion to county for consistency# Join map data with California COVID-19 dataca_joined <-left_join(ca, california_cases, by ="county")# Plot cases by county in Californiaggplot(data = ca_joined, aes(x = long, y = lat, group = group)) +geom_polygon(aes(fill = total_cases), color ="black") +coord_quickmap() +scale_fill_viridis(option ="magma", direction =-1, na.value ="gray90") +# Use Viridis color scaletheme_minimal() +labs(title ="Total COVID-19 Cases by County in California (2023)",fill ="Cases" )